knitr::opts_chunk$set(echo = TRUE, message = FALSE)
library(Seurat)
library(ggplot2)
library(data.table)
library(MAST)
library(SingleR)
library(dplyr)
library(tidyr)
library(limma)
library(ggrepel)
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggrepel_0.8.2               limma_3.44.3               
##  [3] tidyr_1.1.1                 dplyr_1.0.2                
##  [5] SingleR_1.2.4               MAST_1.14.0                
##  [7] SingleCellExperiment_1.10.1 SummarizedExperiment_1.18.2
##  [9] DelayedArray_0.14.1         matrixStats_0.56.0         
## [11] Biobase_2.48.0              GenomicRanges_1.40.0       
## [13] GenomeInfoDb_1.24.2         IRanges_2.22.2             
## [15] S4Vectors_0.26.1            BiocGenerics_0.34.0        
## [17] data.table_1.13.0           ggplot2_3.3.2              
## [19] Seurat_3.2.0               
## 
## loaded via a namespace (and not attached):
##   [1] AnnotationHub_2.20.1          BiocFileCache_1.12.1         
##   [3] plyr_1.8.6                    igraph_1.2.5                 
##   [5] lazyeval_0.2.2                splines_4.0.2                
##   [7] BiocParallel_1.22.0           listenv_0.8.0                
##   [9] digest_0.6.25                 htmltools_0.5.0              
##  [11] magrittr_1.5                  memoise_1.1.0                
##  [13] tensor_1.5                    cluster_2.1.0                
##  [15] ROCR_1.0-11                   globals_0.12.5               
##  [17] colorspace_1.4-1              blob_1.2.1                   
##  [19] rappdirs_0.3.1                xfun_0.16                    
##  [21] crayon_1.3.4                  RCurl_1.98-1.2               
##  [23] jsonlite_1.7.0                spatstat_1.64-1              
##  [25] spatstat.data_1.4-3           survival_3.2-3               
##  [27] zoo_1.8-8                     ape_5.4-1                    
##  [29] glue_1.4.1                    polyclip_1.10-0              
##  [31] gtable_0.3.0                  zlibbioc_1.34.0              
##  [33] XVector_0.28.0                leiden_0.3.3                 
##  [35] BiocSingular_1.4.0            future.apply_1.6.0           
##  [37] abind_1.4-5                   scales_1.1.1                 
##  [39] DBI_1.1.0                     miniUI_0.1.1.1               
##  [41] Rcpp_1.0.5                    viridisLite_0.3.0            
##  [43] xtable_1.8-4                  reticulate_1.16              
##  [45] bit_4.0.4                     rsvd_1.0.3                   
##  [47] htmlwidgets_1.5.1             httr_1.4.2                   
##  [49] RColorBrewer_1.1-2            ellipsis_0.3.1               
##  [51] ica_1.0-2                     pkgconfig_2.0.3              
##  [53] uwot_0.1.8                    dbplyr_1.4.4                 
##  [55] deldir_0.1-28                 tidyselect_1.1.0             
##  [57] rlang_0.4.7                   reshape2_1.4.4               
##  [59] later_1.1.0.1                 AnnotationDbi_1.50.3         
##  [61] munsell_0.5.0                 BiocVersion_3.11.1           
##  [63] tools_4.0.2                   generics_0.0.2               
##  [65] RSQLite_2.2.0                 ExperimentHub_1.14.1         
##  [67] ggridges_0.5.2                evaluate_0.14                
##  [69] stringr_1.4.0                 fastmap_1.0.1                
##  [71] yaml_2.2.1                    goftest_1.2-2                
##  [73] knitr_1.29                    bit64_4.0.2                  
##  [75] fitdistrplus_1.1-1            purrr_0.3.4                  
##  [77] RANN_2.6.1                    pbapply_1.4-3                
##  [79] future_1.18.0                 nlme_3.1-148                 
##  [81] mime_0.9                      compiler_4.0.2               
##  [83] plotly_4.9.2.1                curl_4.3                     
##  [85] png_0.1-7                     interactiveDisplayBase_1.26.3
##  [87] spatstat.utils_1.17-0         tibble_3.0.3                 
##  [89] stringi_1.4.6                 lattice_0.20-41              
##  [91] Matrix_1.2-18                 vctrs_0.3.2                  
##  [93] pillar_1.4.6                  lifecycle_0.2.0              
##  [95] BiocManager_1.30.10           lmtest_0.9-37                
##  [97] RcppAnnoy_0.0.16              BiocNeighbors_1.6.0          
##  [99] cowplot_1.0.0                 bitops_1.0-6                 
## [101] irlba_2.3.3                   httpuv_1.5.4                 
## [103] patchwork_1.0.1               R6_2.4.1                     
## [105] promises_1.1.1                KernSmooth_2.23-17           
## [107] gridExtra_2.3                 codetools_0.2-16             
## [109] MASS_7.3-52                   assertthat_0.2.1             
## [111] withr_2.2.0                   sctransform_0.2.1            
## [113] GenomeInfoDbData_1.2.3        mgcv_1.8-31                  
## [115] grid_4.0.2                    rpart_4.1-15                 
## [117] rmarkdown_2.3                 DelayedMatrixStats_1.10.1    
## [119] Rtsne_0.15                    shiny_1.5.0
#wbm <- readRDS('./data/v2/combined.integrated.rds')
wbm <- readRDS('./data/V2/lesser.combined.integrated.rds')

Introduction

In v2 of the analysis we decided to include the control mice from the Nbeal experiment with the Migr1 and Mpl mice. The thought is that it may be good to have another control, since the Migr1 control has irradiated and had a bone marrow transplantation. I’m going to split the Rmarkdown files into separate part, to better organize my analysis.

This File

Here I’m gather the cluster centroids for the different 15 different clusters before and after cca correction.

Before and After Cluster Centroids

Getting cluster centroids before cca (‘RNA’ assay) and after cca (‘integrated’ assay)

# Getting cluster centroids for all assays
av.expression <- AverageExpression(wbm)

# Subsetting the RNA assay data frame to only genes in integrated assay
av.expression$RNA <- av.expression$RNA[rownames(av.expression$RNA) %in% rownames(av.expression$integrated),]

# Adding distinguishing names to merge into one data frame
colnames(av.expression$integrated) <- paste0("Integrated-",0:14)
colnames(av.expression$RNA) <- paste0("RNA-", 0:14)

# Combining into one data frame
av.exp <- cbind(av.expression$RNA, av.expression$integrated)

# Writing it out to a table for Jun to analyze
# write.table(av.exp, './data/v2/cluster.centroids.before.after.cca.txt', sep = '\t', quote = F)

PCA

Wanted to see how the centroids distinguished themselves

pca.av.exp <- prcomp(av.exp)

print(pca.av.exp$sdev)
##  [1] 87.7330768 43.4970733 42.4476551 40.7532825  8.3004814  8.1825628
##  [7]  6.7821755  6.5915510  5.5046258  5.3172070  4.0254913  3.9634890
## [13]  2.8131370  2.6846944  2.2243166  2.1660917  2.1487000  2.0454804
## [19]  1.9819829  1.9172511  1.6886617  1.6613350  1.5123164  1.4562874
## [25]  1.3314026  1.2987749  0.3695226  0.3533690  0.2377896  0.2339423
pca.av.exp <- as.data.frame(pca.av.exp$rotation)

pca.av.exp$assay <- tstrsplit(rownames(pca.av.exp),'-')[[1]]
pca.av.exp$cluster <- tstrsplit(rownames(pca.av.exp),'-')[[2]]

ggplot(pca.av.exp, aes(x = PC1, y = PC2, color = assay)) +
        geom_point() + theme_bw()

ggplot(pca.av.exp, aes(x = PC1, y = PC2, color = cluster,label = cluster)) +
        geom_point() + theme_bw() + geom_text_repel()

# DimPlot(wbm, reduction = 'umap', label = T, repel = T)
# 
ggplot(pca.av.exp, aes(x = PC3, y = PC4, label = cluster, color = assay)) +
        geom_point () +
        geom_text_repel() +
        theme_bw()

# ggplot(pca.av.exp, aes(x = PC4, y = PC5, label = cluster, color = assay)) +
#         geom_point () +
#         geom_text_repel() + 
#         theme_bw()
# 
# ggplot(pca.av.exp, aes(x = PC6, y = PC7, label = cluster, color = assay)) +
#         geom_point () +
#         geom_text_repel() + 
#         theme_bw()

It seems that each PC is distinguishing a particular cluster. This does make sense because that’s in general how the clusters were created.

Before and After Cluster Centroids by Experiment/Condition

Doing the same thing as before but further dividing the centroids for each condition (enriched or normal; Migr1, Mpl, Nbeal)

# Getting cluster centroids for each experiments for all assay
av.expression.by.condition <- AverageExpression(wbm, add.ident = 'Condition')

# Subsetting the RNA assay data frame to only genes in integrated assay
av.expression.by.condition$RNA <- 
        av.expression.by.condition$RNA[rownames(av.expression.by.condition$RNA) %in%
                                               rownames(av.expression.by.condition$integrated),]

# Getting unique colnames
colnames(av.expression.by.condition$integrated) <- paste0("Integrated-",colnames(av.expression.by.condition$integrated))
colnames(av.expression.by.condition$RNA) <- paste0("RNA-", colnames(av.expression.by.condition$RNA))

# Combining into one data frame
av.exp.by.cond <- cbind(av.expression.by.condition$RNA, av.expression.by.condition$integrated)

#  Writing it out to a table for Jun to use
# write.table(av.exp.by.cond, './data/v2/cluster.centroids.before.after.cca.by.condition.txt', 
#             sep = '\t',
#             quote = F)

PCA

pca.av.exp <- prcomp(av.exp.by.cond)

print(pca.av.exp$sdev[1:30])
##  [1] 194.342051 147.302812 113.007868 108.799772  47.365515  45.667525
##  [7]  37.517217  34.021108  20.035359  19.962408  19.017627  18.415837
## [13]  14.399119  13.760190  13.601906  12.290127   9.821881   9.345166
## [19]   7.717798   7.508813   7.260316   6.360444   6.298828   5.864222
## [25]   5.698218   5.423408   5.351688   5.040190   4.761992   4.631636
pca.av.exp <- as.data.frame(pca.av.exp$rotation)

pca.av.exp$assay <- tstrsplit(rownames(pca.av.exp),'-')[[1]]
pca.av.exp$cluster <- tstrsplit(tstrsplit(rownames(pca.av.exp),'-')[[2]],'_')[[1]]
pca.av.exp$condition <- tstrsplit(tstrsplit(rownames(pca.av.exp),'-')[[2]],'_')[[2]]

ggplot(pca.av.exp, aes(x = PC1, y = PC2, color = cluster, shape = condition)) +
        geom_point() + theme_bw()

Comparing Clusters from original data to integreated data

So doing each of these separately (Mpl/Migr1 and Nbeal) and getting clusters. For each cluster getting the cell identity and cluster centroids. Then compare these to the final clusters to see how centroids compare. Do some original clusters get split into different centroids, or just combine across experiments easily.

Also going to get cell count cross tabulation from clusters before and after integration

For generating the clusters, I’m not sure if it’s better to have more or less clusters in the end. Going to generate two sets of clusters to then compare downstream.

# Most of this is being copied from v2.1.Integration.UMAP.Rmd
# Except here I am also generating clusters integration

wbm.data <- Read10X(data.dir = './data/filtered_feature_bc_matrix//')

# Creating Seurat Object

wbm <- CreateSeuratObject(counts = wbm.data, project = 'Mpl', min.cells = 3, min.features = 200)

# Reading in the HTO labels provided by Dr. Brian Parkin

htos <- read.csv('./data/HTOs.csv')

#dim(htos)

# Removing all the cells without a label

htos <- htos[htos$HTOs != '',]

#dim(htos)

# Adding the conditioni from the HTO tagged, from an e-mail from Priya

htos$condition <- ifelse(htos$HTOs == 'HTO-1', 'Mpl',
                         ifelse(htos$HTOs == 'HTO-2', 'enrMpl',
                                ifelse(htos$HTOs == 'HTO-3', 'Migr1','enrMigr1')))

# Subsetting the two data frames so they only include cells that overlap

wbm <- wbm[,colnames(wbm) %in% htos$Barcode]

htos <- htos[htos$Barcode %in% colnames(wbm),]

# Making sure the cell order is maintained between the two dataframes, so I can
# just add the condition to the meta data

#summary(rownames(wbm@meta.data) == htos$Barcode)

# Adding the condition to the meta data

wbm@meta.data$Condition <- htos$condition

wbm[['percent.mt']] <- PercentageFeatureSet(wbm, pattern = '^mt')

less.subset.wbm <- subset(wbm, subset = nFeature_RNA > 500 & percent.mt < 10)
cnt.data <- Read10X(data.dir = './data/Experiment2/filtered_feature_bc_matrix/')

cnt <- CreateSeuratObject(counts = cnt.data, project = 'Nbeal', min.cells = 3, min.features = 200)

# Getting the HTOs

nbeal_hto <- read.table('./data/Experiment2/hto_labels.txt')

nbeal_hto <- nbeal_hto[nbeal_hto$V2 %in% c('HTO3','HTO4'),]

nbeal_hto$condition <- ifelse(nbeal_hto$V2 == 'HTO3', 'Nbeal_cntrl', 'enrNbeal_cntrl')

nbeal_hto$cell <- paste0(nbeal_hto$V1, '-1')

# summary(nbeal_hto$cell %in% colnames(cnt))

cnt <- cnt[,colnames(cnt) %in% nbeal_hto$cell]

# Making sure the cell order is maintained between the two dataframes, so I can
# just add the condition to the meta data

#summary(rownames(wbm@meta.data) == htos$Barcode)

# Adding the condition to the meta data

cnt@meta.data$Condition <- nbeal_hto$condition

cnt[['percent.mt']] <- PercentageFeatureSet(cnt, pattern = '^mt')

less.cnt <- subset(cnt, subset = nFeature_RNA > 500 & percent.mt < 10)

Mpl/Migr1 Experiment

less.subset.wbm <- NormalizeData(less.subset.wbm, verbose = F)

less.subset.wbm <- FindVariableFeatures(less.subset.wbm,
                                        selection.method = 'vst',
                                        nfeatures = 2000,
                                        verbose = F)

less.subset.wbm <- ScaleData(less.subset.wbm, verbose = F)

less.subset.wbm <- RunPCA(less.subset.wbm, features = VariableFeatures(less.subset.wbm))

ElbowPlot(less.subset.wbm)

# choosing 15 PCs

less.subset.wbm <- FindNeighbors(less.subset.wbm, dims = 1:15)

res <- seq(0,1, by = 0.05)
clstrs <- c()

for (i in res){
        x <- FindClusters(less.subset.wbm, resolution = i, verbose = F)
        clstrs <- c(clstrs, length(unique(x$seurat_clusters)))
        
}
plot(res,clstrs)

# Going with .2 and .7

less.subset.wbm <- FindClusters(less.subset.wbm, resolution = .2, verbose = F)
less.subset.wbm <- FindClusters(less.subset.wbm, resolution = .7, verbose = F)

less.subset.wbm <- RunUMAP(less.subset.wbm, dims = 1:15)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
DimPlot(less.subset.wbm, reduction = 'umap', label = T, repel = T) + NoLegend() + ggtitle ('Resolution 0.7')

DimPlot(less.subset.wbm, reduction = 'umap', label = T, repel = T, group.by = 'RNA_snn_res.0.2') +
        NoLegend() + ggtitle ('Resolution 0.2')

Control Experiment

less.cnt <- NormalizeData(less.cnt, verbose = F)

less.cnt <- FindVariableFeatures(less.cnt,
                                        selection.method = 'vst',
                                        nfeatures = 2000,
                                        verbose = F)

less.cnt <- ScaleData(less.cnt, verbose = F)

less.cnt <- RunPCA(less.cnt, features = VariableFeatures(less.cnt))

ElbowPlot(less.cnt)

# choosing 15 PCs

less.cnt <- FindNeighbors(less.cnt, dims = 1:15)

res <- seq(0,1, by = 0.05)
clstrs <- c()

for (i in res){
        x <- FindClusters(less.cnt, resolution = i, verbose = F)
        clstrs <- c(clstrs, length(unique(x$seurat_clusters)))
        
}
plot(res,clstrs)

# Going with .2 and .7

less.cnt <- FindClusters(less.cnt, resolution = .2, verbose = F)
less.cnt <- FindClusters(less.cnt, resolution = .7, verbose = F)

less.cnt <- RunUMAP(less.cnt, dims = 1:15)

DimPlot(less.cnt, reduction = 'umap', label = T, repel = T) + NoLegend() + ggtitle ('Resolution 0.7')

DimPlot(less.cnt, reduction = 'umap', label = T, repel = T, group.by = 'RNA_snn_res.0.2') +
        NoLegend() + ggtitle ('Resolution 0.2')

wbm <- readRDS('./data/v2/lesser.combined.integrated.rds')

wbm$State <- wbm$Condition

wbm$Condition <- ifelse(grepl('enr', wbm$Condition), 'Enriched', 'Not enriched')

wbm$Experiment <- ifelse(grepl('Mpl', wbm$State), 'Mpl',
                         ifelse(grepl('Migr', wbm$State), 'Migr1', 'Control'))

sumry <- read.table('./data/v2/summary_naming.tsv', header = T, sep = '\t')
# sumry

# new_levels <- sumry$final

new_levels <- c('Gran-1','Gran-2','SC','B cell-1','Gran-3','Monocyte','MEP/Mast',
                '?Prog','Macrophage','B cell-2','Erythrocyte', 'T cell',
                'Megakaryocyte','B cell-3', 'B cell-4')

names(new_levels)  <- levels(wbm)

DimPlot(wbm, reduction = 'umap', label = T, repel = T) + NoLegend()

#new_levels
wbm <- RenameIdents(wbm, new_levels)
wbm$new_cluster_IDs <- Idents(wbm)

DimPlot(wbm, reduction = 'umap', label = T, repel = T) + NoLegend() +
        ggtitle ('Integrated Dataset')

Cell Cross Tabulation

wbm.less.clusters <- less.subset.wbm@meta.data[,c('Condition','RNA_snn_res.0.2','RNA_snn_res.0.7')]

less.cnt.clusters <- less.cnt@meta.data[,c('Condition','RNA_snn_res.0.2','RNA_snn_res.0.7')]

wbm.clusters <- wbm@meta.data[,c('State','seurat_clusters')]

rownames(wbm.less.clusters) <- paste0('Mpl_', rownames(wbm.less.clusters))

rownames(less.cnt.clusters) <- paste0('Nbeal_', rownames(less.cnt.clusters))

mpl.clusters <- cbind(wbm.clusters[grepl('Mpl', rownames(wbm.clusters)),], wbm.less.clusters)
nbeal.clusters <- cbind(wbm.clusters[grepl('Nbeal', rownames(wbm.clusters)),], less.cnt.clusters)

Mpl Tables

mpl.tbl <- as.data.frame(table( mpl.clusters$RNA_snn_res.0.2, mpl.clusters$seurat_clusters))

colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')

mpl.tbl$Percentage <- NA

for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
        mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
                round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
                              sum(wbm.less.clusters$RNA_snn_res.0.2 == i),2)*100
}


ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('Migr1/Mpl Experiment (res = 0.2)')

mpl.tbl <- as.data.frame(table( mpl.clusters$RNA_snn_res.0.7, mpl.clusters$seurat_clusters))

colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')

mpl.tbl$Percentage <- NA

for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
        mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
                round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
                              sum(wbm.less.clusters$RNA_snn_res.0.7 == i),2)*100
}


ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('Migr1/Mpl Experiment (res = 0.7)')

DimPlot(wbm, reduction = 'umap', label = T, repel = T, group.by = 'seurat_clusters')

Most of the original clusters that get up into multiple clusters in the integrated umap occur within cell type groups. For example there are original clusters that are found in both integrated cluster 3 & 13 (both B-cell clusters), and same with clusters 0, 1 & 4 (Granulocytes). Of interest is to note some original clusters being split into integrated clusters 10 & 12, erythrocytes and MKs respectivaly. These are different cell types but with a close relative progenitor MEPs, so perhaps these are some MEPs being split up with the addition of the integrate Nbeal data.

Looking at Subdividing into States

Splitting up whether we are looking at enriched/non-enrich and between Mpl and Migr1

Going to stick with the resolution of 0.3, not overload figures

Migr1 Only
mpl.clusters2 <- mpl.clusters[grepl('Migr1',mpl.clusters$State),]
mpl.tbl <- as.data.frame(table( mpl.clusters2$RNA_snn_res.0.2, mpl.clusters2$seurat_clusters))

colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')

mpl.tbl$Percentage <- NA

for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
        mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
                round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
                             sum(wbm.less.clusters[grepl('Migr1',mpl.clusters$State),]$RNA_snn_res.0.2 == i),2)*100
}


ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('All Migr1 Experiment (res = 0.2)')

mpl.clusters2 <- mpl.clusters[grepl('enrMigr1',mpl.clusters$State),]
mpl.tbl <- as.data.frame(table( mpl.clusters2$RNA_snn_res.0.2, mpl.clusters2$seurat_clusters))

colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')

mpl.tbl$Percentage <- NA

for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
        mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
                round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
                          sum(wbm.less.clusters[grepl('enrMigr1',mpl.clusters$State),]$RNA_snn_res.0.2 == i),2)*100
}


ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('Enr Migr1 Experiment (res = 0.2)')

mpl.clusters2 <- mpl.clusters[mpl.clusters$State == 'Migr1',]
mpl.tbl <- as.data.frame(table( mpl.clusters2$RNA_snn_res.0.2, mpl.clusters2$seurat_clusters))

colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')

mpl.tbl$Percentage <- NA

for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
        mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
                round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
                          sum(wbm.less.clusters[mpl.clusters$State == 'Migr1',]$RNA_snn_res.0.2 == i),2)*100
}


ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('Non-enriched Migr1 Experiment (res = 0.2)')

Migr1 Only
mpl.clusters2 <- mpl.clusters[grepl('Mpl',mpl.clusters$State),]
mpl.tbl <- as.data.frame(table( mpl.clusters2$RNA_snn_res.0.2, mpl.clusters2$seurat_clusters))

colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')

mpl.tbl$Percentage <- NA

for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
        mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
                round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
                             sum(wbm.less.clusters[grepl('Mpl',mpl.clusters$State),]$RNA_snn_res.0.2 == i),2)*100
}


ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('All Mpl Experiment (res = 0.2)')

mpl.clusters2 <- mpl.clusters[grepl('enrMpl',mpl.clusters$State),]
mpl.tbl <- as.data.frame(table( mpl.clusters2$RNA_snn_res.0.2, mpl.clusters2$seurat_clusters))

colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')

mpl.tbl$Percentage <- NA

for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
        mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
                round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
                          sum(wbm.less.clusters[grepl('enrMpl',mpl.clusters$State),]$RNA_snn_res.0.2 == i),2)*100
}


ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('Enr Mpl Experiment (res = 0.2)')

mpl.clusters2 <- mpl.clusters[mpl.clusters$State == 'Mpl',]
mpl.tbl <- as.data.frame(table( mpl.clusters2$RNA_snn_res.0.2, mpl.clusters2$seurat_clusters))

colnames(mpl.tbl) <- c('Original Cluster','Integrated Cluster','Count')

mpl.tbl$Percentage <- NA

for (i in 0:length(unique(mpl.tbl$`Original Cluster`))){
        mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Percentage <-
                round(mpl.tbl[mpl.tbl$`Original Cluster` == i,]$Count /
                          sum(wbm.less.clusters[mpl.clusters$State == 'Mpl',]$RNA_snn_res.0.2 == i),2)*100
}


ggplot(data = mpl.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('Non-enriched Mpl Experiment (res = 0.2)')

Nbeal Tables

nbeal.tbl <- as.data.frame(table( nbeal.clusters$RNA_snn_res.0.2, nbeal.clusters$seurat_clusters))

colnames(nbeal.tbl) <- c('Original Cluster','Integrated Cluster','Count')

nbeal.tbl$Percentage <- NA

for (i in 0:length(unique(nbeal.tbl$`Original Cluster`))){
        nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Percentage <-
                round(nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Count /
                              sum(less.cnt.clusters$RNA_snn_res.0.2 == i),2)*100
}


ggplot(data = nbeal.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('Nbeal Experiment (res = 0.2)')

nbeal.tbl <- as.data.frame(table( nbeal.clusters$RNA_snn_res.0.7, nbeal.clusters$seurat_clusters))

colnames(nbeal.tbl) <- c('Original Cluster','Integrated Cluster','Count')

nbeal.tbl$Percentage <- NA

for (i in 0:length(unique(nbeal.tbl$`Original Cluster`))){
        nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Percentage <-
                round(nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Count /
                              sum(less.cnt.clusters$RNA_snn_res.0.7 == i),2)*100
}


ggplot(data = nbeal.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('Nbeal Experiment (res = 0.7)')

nbeal.clusters2 <- nbeal.clusters[nbeal.clusters$State == 'Nbeal_cntrl',]
nbeal.tbl <- as.data.frame(table( nbeal.clusters2$RNA_snn_res.0.2, nbeal.clusters2$seurat_clusters))

colnames(nbeal.tbl) <- c('Original Cluster','Integrated Cluster','Count')

nbeal.tbl$Percentage <- NA

for (i in 0:length(unique(nbeal.tbl$`Original Cluster`))){
        nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Percentage <-
                round(nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Count /
                              sum(less.cnt.clusters[less.cnt.clusters$Condition == 'Nbeal_cntrl',]$RNA_snn_res.0.2 == i),2)*100
}


ggplot(data = nbeal.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('non-enrNbeal Experiment (res = 0.2)')

nbeal.clusters2 <- nbeal.clusters[nbeal.clusters$State == 'enrNbeal_cntrl',]
nbeal.tbl <- as.data.frame(table( nbeal.clusters2$RNA_snn_res.0.2, nbeal.clusters2$seurat_clusters))

colnames(nbeal.tbl) <- c('Original Cluster','Integrated Cluster','Count')

nbeal.tbl$Percentage <- NA

for (i in 0:length(unique(nbeal.tbl$`Original Cluster`))){
        nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Percentage <-
                round(nbeal.tbl[nbeal.tbl$`Original Cluster` == i,]$Count /
                              sum(less.cnt.clusters[less.cnt.clusters$Condition == 'enrNbeal_cntrl',]$RNA_snn_res.0.2 == i),2)*100
}


ggplot(data = nbeal.tbl, aes(x = `Integrated Cluster`, y = `Original Cluster`)) +
        geom_tile(aes(fill = Percentage)) +
        scale_fill_gradient2(low = 'white', mid = 'red', high = 'darkred',midpoint = 50) +
        geom_text(aes(label = Count)) +
        ggtitle('enrNbeal Experiment (res = 0.2)')